require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Hmisc::getHdata(nhgh)
d1 <- nhgh %>%
filter(sex == "female") %>%
filter(age >= 18) %>%
select(gh, ht) %>%
filter(1:n()<=1000)
mm.gamma.shape = mean(d1$ht)^2/var(d1$ht)
mm.gamma.scale=var(d1$ht)/mean(d1$h)
mm.norm.mean=mean(d1$ht)
mm.norm.sd=sd(d1$ht)
mean.weib = function(lambda, k){
lambda*gamma(1 + 1/k)
}
lambda = function(samp.mean, k){
samp.mean/gamma(1+1/k)
}
var.weib = function(samp.mean, k,samp.var){
(lambda(samp.mean, k))^2*(gamma(1+2/k)-(gamma(1+1/k))^2)-samp.var
}
mm.opt = optimize(f = function(x){abs(var.weib(k = x, samp.mean = mean(d1$ht), samp.var = var(d1$ht)))},lower = 10, upper = 100)
mm.weib.k = mm.opt$minimum
mm.weib.lambda= lambda(samp.mean = mean(d1$ht), k=mm.weib.k)
hist(d1$ht, main = "Height of Adult Females: MM", breaks = 100, freq = FALSE)
curve(dgamma(x, shape = mm.gamma.shape, scale = mm.gamma.scale), add = TRUE, col = "red")
curve(dnorm(x, mm.norm.mean, mm.norm.sd), add = TRUE, col = "blue", lwd = 2)
curve(dweibull(x, shape = mm.weib.k, scale = mm.weib.lambda),add = TRUE, col = "green",lwd = 2)
plot(ecdf(d1$ht),main = "The CDF and ECDF of ht: MM")
curve(pgamma(x, shape = mm.gamma.shape, scale = mm.gamma.scale), add = TRUE, col = "red",lwd = 6)
curve(pnorm(x, mm.norm.mean, mm.norm.sd), add = TRUE, col = "blue", lwd = 4)
curve(pweibull(x, shape = mm.weib.k, scale = mm.weib.lambda),add = TRUE, col = "green",lwd = 4)
p = ppoints(300)
y = quantile(d1$ht, probs = p)
#gamma
x.gamma.mm = qgamma(p, shape = mm.gamma.shape, scale = mm.gamma.scale)
plot(x.gamma.mm,y,main = "Gamma QQplot")
abline(0,1)
#norm
x.norm.mm = qnorm(p, mm.norm.mean, mm.norm.sd)
plot(x.norm.mm,y,main = "Norm QQplot")
abline(0,1)
#weibull
x.pwei.mm=qweibull(p, shape = mm.weib.k, scale = mm.weib.lambda)
plot(x.pwei.mm,y,main = "Weibull QQplot")
abline(0,1)
median(d1$ht)
## [1] 160.8
qweibull(0.5, shape = mm.weib.k, scale = mm.weib.lambda)
## [1] 161.8065
qgamma(0.5, shape = mm.gamma.shape, scale = mm.gamma.scale)
## [1] 160.6308
qnorm(0.5, mm.norm.mean, mm.norm.sd)
## [1] 160.7419
#gamma
med.gam = NA
for (i in 1:1000) {
data = rgamma(200, shape = mm.gamma.shape, scale = mm.gamma.scale)
med.gam[i] = median(data)
}
hist(med.gam,breaks = 100,main = "Histogram of median of gamma distribution")
#norm
med.norm = NA
for (i in 1:1000) {
data = rnorm(200, mm.norm.mean, mm.norm.sd)
med.norm[i] = median(data)
}
hist(med.norm,breaks = 100,main = "Histogram of median of normal distribution")
#
med.wei = NA
for (i in 1:1000) {
data = rweibull(200, shape = mm.weib.k, scale = mm.weib.lambda)
med.wei[i] = median(data)
}
hist(med.wei,breaks = 100,main = "Histogram of median of weibull distribution")
diff(quantile(med.gam,probs = c(0.025,0.975)))
## 97.5%
## 2.455218
diff(quantile(med.norm,probs = c(0.025,0.975)))
## 97.5%
## 2.418323
diff(quantile(med.wei,probs = c(0.025,0.975)))
## 97.5%
## 2.448809
library(stats4)
nLL <- function(mean, sd){
fs <- dnorm(
x = d1$ht
,mean = mean
,sd = sd
,log = TRUE
)
-sum(fs)
}
fit <- mle(
nLL
,start = list(mean = 160, sd = 5)
,method = "L-BFGS-B"
,lower = c(0,0.01)
)
nLL.gamma <- function(shape, scale){
fs <- dgamma(
x = d1$ht
,shape = shape
,scale = scale
,log = TRUE
)
-sum(fs)
}
fit.gamma <- mle(
nLL.gamma
,start = list(shape = 1, scale = 1)
,method = "L-BFGS-B"
,lower = c(0,0.01)
)
nLL.weibull <- function(shape, scale){
fs <- dweibull(
x = d1$ht
,shape = shape
,scale = scale
,log = TRUE
)
-sum(fs)
}
fit.weibull <- mle(
nLL.weibull
,start = list(shape = 1, scale = 1)
,method = "L-BFGS-B"
,lower = c(0,0.01)
)
hist(d1$ht,breaks = 100, freq = FALSE,,main = "Overlay estimated pdf onto histogram of Normal distribution")
curve(dnorm(x,mean = coef(fit)[1],sd =coef(fit)[2]),col = "red" ,add = TRUE, lwd = 4)
hist(d1$ht,breaks = 100, freq = FALSE,,main = "Overlay estimated pdf onto histogram of Gamma distribution")
curve(dgamma(x,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2]),col = "blue" ,add = TRUE, lwd = 4)
hist(d1$ht,breaks = 100, freq = FALSE,main = "Overlay estimated pdf onto histogram of Weibull distribution")
curve(dweibull(x,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2]),col = "green" ,add = TRUE, lwd = 4)
plot(ecdf(d1$ht),,main = "Overlay estimated CDF onto eCDF of normal distribution")
curve(pnorm(x,mean = coef(fit)[1],sd =coef(fit)[2]),col = "red" ,add = TRUE, lwd = 4)
plot(ecdf(d1$ht),,main = "Overlay estimated CDF onto eCDF of gamma distribution")
curve(pgamma(x,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2]),col = "blue" ,add = TRUE, lwd = 4)
plot(ecdf(d1$ht),,main = "Overlay estimated CDF onto eCDF of weibull distribution")
curve(pweibull(x,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2]),col = "green" ,add = TRUE, lwd = 4)
qs <- seq(0.05,0.95,length = 100)
samp_qs <- quantile(d1$ht,qs)
theor_qs <- qnorm(qs,mean = coef(fit)[1],sd =coef(fit)[2])
plot(samp_qs,theor_qs)
abline(0,1)
qs <- seq(0.05,0.95,length = 100)
samp_qs.gamma <- quantile(d1$ht,qs)
theor_qs.gamma <- qgamma(qs,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2])
plot(samp_qs.gamma,theor_qs.gamma)
abline(0,1)
qs <- seq(0.05,0.95,length = 100)
samp_qs.w <- quantile(d1$ht,qs)
theor_qs.w <- qweibull(qs,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2])
plot(samp_qs.w,theor_qs.w)
abline(0,1)
qnorm(0.5,mean = coef(fit)[1],sd = coef(fit)[2])
## [1] 160.7419
qgamma(0.5,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2])
## [1] 160.6312
qweibull(0.5,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2])
## [1] 161.5156
M <- 500
N <- 1000
out <- rnorm(N*M,mean = coef(fit)[1],sd =coef(fit)[2]) %>% array(dim = c(M,N))
samp_dist <- apply(out,1,median)
hist(samp_dist,breaks = 100,,main = "Median Samp Dist (hist) of normal distribution")
M <- 500
N <- 1000
out <- rgamma(N*M,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2]) %>% array(dim = c(M,N))
samp_dist.g <- apply(out,1,median)
hist(samp_dist.g,breaks = 100,main = "Median Samp Dist (hist) of gamma distribution")
M <- 500
N <- 1000
out <- rweibull(N*M,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2]) %>% array(dim = c(M,N))
samp_dist.w <- apply(out,1,median)
hist(samp_dist.w,breaks = 100,main = "Median Samp Dist (hist) of weibull distribution")
### Range of middle 95% of Samp Dist
diff(quantile(samp_dist,c(0.05/2,1-0.05/2)))
## 97.5%
## 1.112256
diff(quantile(samp_dist.g,c(0.05/2,1-0.05/2)))
## 97.5%
## 1.130482
diff(quantile(samp_dist.w,c(0.05/2,1-0.05/2)))
## 97.5%
## 1.354879
#gamma
mm.gamma.shape.g = mean(d1$gh)^2/var(d1$gh)
mm.gamma.scale.g=var(d1$gh)/mean(d1$gh)
#normal
mm.norm.mean.g=mean(d1$gh)
mm.norm.sd.g=sd(d1$gh)
#weibull
mean.weib.g = function(lambda, k){
lambda*gamma(1 + 1/k)
}
lambda.g = function(samp.mean, k){
samp.mean/gamma(1+1/k)
}
var.weib.g = function(samp.mean, k,samp.var){
(lambda(samp.mean, k))^2*(gamma(1+2/k)-(gamma(1+1/k))^2)-samp.var
}
mm.opt.g = optimize(f = function(x){abs(var.weib(k = x, samp.mean = mean(d1$gh), samp.var = var(d1$gh)))},lower = 10, upper = 100)
mm.weib.k.g = mm.opt.g$minimum
mm.weib.lambda.g= lambda(samp.mean = mean(d1$gh), k=mm.weib.k)
hist(d1$gh, main = "Height of Adult Females: MM", breaks = 100, freq = FALSE,xlim = c(3,12))
curve(dgamma(x, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g), add = TRUE, col = "red")
curve(dnorm(x, mm.norm.mean.g, mm.norm.sd.g), add = TRUE, col = "blue", lwd = 2)
curve(dweibull(x, shape = mm.weib.k.g, scale = mm.weib.lambda.g),add = TRUE, col = "green",lwd = 2)
plot(ecdf(d1$gh),main = "The CDF and ECDF of ht: MM")
curve(pgamma(x, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g), add = TRUE, col = "red",lwd = 6)
curve(pnorm(x, mm.norm.mean.g, mm.norm.sd.g), add = TRUE, col = "blue", lwd = 4)
curve(pweibull(x, shape = mm.weib.k.g, scale = mm.weib.lambda.g),add = TRUE, col = "green",lwd = 4)
p = ppoints(300)
y.g = quantile(d1$gh, probs = p)
#gamma
x.gamma.mm.g = qgamma(p, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g)
plot(x.gamma.mm.g,y.g,main = "Gamma QQplot")
abline(0,1)
#norm
x.norm.mm.g = qnorm(p, mm.norm.mean.g, mm.norm.sd.g)
plot(x.norm.mm.g,y.g,main = "Norm QQplot")
abline(0,1)
#weibull
x.pwei.mm.g=qweibull(p, shape = mm.weib.k.g, scale = mm.weib.lambda.g)
plot(x.pwei.mm.g,y.g,main = "Weibull QQplot")
abline(0,1)
median(d1$gh)
## [1] 5.5
qweibull(0.5, shape = mm.weib.k.g, scale = mm.weib.lambda.g)
## [1] 5.629781
qgamma(0.5, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g)
## [1] 5.660259
qnorm(0.5, mm.norm.mean.g, mm.norm.sd.g)
## [1] 5.7246
#gamma
med.gam.g = NA
for (i in 1:1000) {
data = rgamma(200, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g)
med.gam.g[i] = median(data)
}
hist(med.gam.g,breaks = 100,main = "Histogram of median of gamma distribution")
#norm
med.norm.g = NA
for (i in 1:1000) {
data = rnorm(200, mm.norm.mean.g, mm.norm.sd.g)
med.norm.g[i] = median(data)
}
hist(med.norm.g,breaks = 100,main = "Histogram of median of normal distribution")
#
med.wei.g = NA
for (i in 1:1000) {
data = rweibull(200, shape = mm.weib.k.g, scale = mm.weib.lambda.g)
med.wei.g[i] = median(data)
}
hist(med.wei.g,breaks = 100,main = "Histogram of median of weibull distribution")
diff(quantile(med.gam.g,probs = c(0.025,0.975)))
## 97.5%
## 0.3624233
diff(quantile(med.norm.g,probs = c(0.025,0.975)))
## 97.5%
## 0.3917406
diff(quantile(med.wei.g,probs = c(0.025,0.975)))
## 97.5%
## 0.2109234
library(stats4)
nLL.g <- function(mean, sd){
fs <- dnorm(
x = d1$gh
,mean = mean
,sd = sd
,log = TRUE
)
-sum(fs)
}
fit.g <- mle(
nLL.g
,start = list(mean = 1, sd = 1)
,method = "L-BFGS-B"
,lower = c(0,0.01)
)
nLL.gamma.g <- function(shape, scale){
fs <- dgamma(
x = d1$gh
,shape = shape
,scale = scale
,log = TRUE
)
-sum(fs)
}
fit.gamma.g <- mle(
nLL.gamma.g
,start = list(shape = 1, scale = 1)
,method = "L-BFGS-B"
,lower = c(0,0.01)
)
nLL.weibull.g <- function(shape, scale){
fs <- dweibull(
x = d1$gh
,shape = shape
,scale = scale
,log = TRUE
)
-sum(fs)
}
fit.weibull.g <- mle(
nLL.weibull.g
,start = list(shape = 1, scale = 1)
,method = "L-BFGS-B"
,lower = c(0,0.01)
)
hist(d1$gh,breaks = 100, freq = FALSE,xlim = c(4,12),main = "Overlay estimated pdf onto histogram of Normal distribution")
curve(dnorm(x,mean = coef(fit.g)[1],sd =coef(fit.g)[2]),col = "red" ,add = TRUE, lwd = 4)
hist(d1$gh,breaks = 100, freq = FALSE,,main = "Overlay estimated pdf onto histogram of Gamma distribution")
curve(dgamma(x,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2]),col = "blue" ,add = TRUE, lwd = 4)
hist(d1$gh,breaks = 100, freq = FALSE,main = "Overlay estimated pdf onto histogram of Weibull distribution")
curve(dweibull(x,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2]),col = "green" ,add = TRUE, lwd = 4)
plot(ecdf(d1$gh),,main = "Overlay estimated CDF onto eCDF of normal distribution")
curve(pnorm(x,mean = coef(fit.g)[1],sd =coef(fit.g)[2]),col = "red" ,add = TRUE, lwd = 4)
plot(ecdf(d1$gh),,main = "Overlay estimated CDF onto eCDF of gamma distribution")
curve(pgamma(x,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2]),col = "blue" ,add = TRUE, lwd = 4)
plot(ecdf(d1$gh),,main = "Overlay estimated CDF onto eCDF of weibull distribution")
curve(pweibull(x,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2]),col = "green" ,add = TRUE, lwd = 4)
qs <- seq(0.05,0.95,length = 100)
samp_qs.g <- quantile(d1$gh,qs)
theor_qs.g <- qnorm(qs,mean = coef(fit.g)[1],sd =coef(fit.g)[2])
plot(samp_qs.g,theor_qs.g)
abline(0,1)
qs <- seq(0.05,0.95,length = 100)
samp_qs.gamma.g <- quantile(d1$gh,qs)
theor_qs.gamma.g <- qgamma(qs,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2])
plot(samp_qs.gamma.g,theor_qs.gamma.g)
abline(0,1)
qs <- seq(0.05,0.95,length = 100)
samp_qs.w.g <- quantile(d1$ht,qs)
theor_qs.w.g <- qweibull(qs,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2])
plot(samp_qs.w.g,theor_qs.w.g)
abline(0,1)
qnorm(0.5,mean = coef(fit.g)[1],sd = coef(fit.g)[2])
## [1] 5.7246
qgamma(0.5,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2])
## [1] 5.677983
qweibull(0.5,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2])
## [1] 5.64902
M <- 500
N <- 1000
out.g <- rnorm(N*M,mean = coef(fit.g)[1],sd =coef(fit.g)[2]) %>% array(dim = c(M,N))
samp_dist.g.g <- apply(out.g,1,median)
hist(samp_dist.g.g,breaks = 100,,main = "Median Samp Dist (hist) of normal distribution")
M <- 500
N <- 1000
out.gamma.g <- rgamma(N*M,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2]) %>% array(dim = c(M,N))
samp_dist.gamma.g <- apply(out.gamma.g,1,median)
hist(samp_dist.gamma.g,breaks = 100,main = "Median Samp Dist (hist) of gamma distribution")
M <- 500
N <- 1000
out.w.g <- rweibull(N*M,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2]) %>% array(dim = c(M,N))
samp_dist.w.g <- apply(out.w.g,1,median)
hist(samp_dist.w.g,breaks = 100,main = "Median Samp Dist (hist) of weibull distribution")
diff(quantile(samp_dist.g.g,c(0.05/2,1-0.05/2)))
## 97.5%
## 0.1600192
diff(quantile(samp_dist.gamma.g,c(0.05/2,1-0.05/2)))
## 97.5%
## 0.130245
diff(quantile(samp_dist.w.g,c(0.05/2,1-0.05/2)))
## 97.5%
## 0.2410041